clear  

*****************************************************************************
* PROCESSES VARIOUS NOMENCLATURES
*****************************************************************************

**********************************************************************
* NAF English - December 2020
**********************************************************************

* Import CSV file containing NAF classifications in English and French
insheet using "${nomenclature}Nomenclatures-NAF-rev-2_anglais.csv", delim(",") case names clear

keep v4 intitulésdelaNAFrév2anglais intitulésdelaNAFrév2français
order v4 intitulésdelaNAFrév2anglais intitulésdelaNAFrév2français
rename (intitulésdelaNAFrév2anglais intitulésdelaNAFrév2français) (label_en label_fr)
rename v4 NAF

* Remove unwanted characters and convert labels to lowercase
replace NAF = subinstr(NAF, ".", "", .)
replace NAF = subinstr(NAF, "SECTION ", "", .)
replace label_en = lower(label_en)

export delimited using "${dta}\APElibel_en.csv"
save "${dta}\APElibel_en.dta", replace

**********************************************************************
* PCS (Professions and Socio-professional Categories)
**********************************************************************

clear 

* Import Excel file for PCS correspondence from 1982 to 2003
import excel using "${nomenclature}PCS_corresp_82_2003.xls", firstrow allstring clear

keep code2003 libellé2003 ceo
drop if code2003 == "" | libellé2003 == ""

* Retain unique PCS codes
bys code2003: keep if _n == 1 // 412 PCS codes in 2003

* Rename variables for clarity
rename (code2003 libellé2003) (pcs libel03)

* Convert 'ceo' to numeric and replace missing values with 0
destring ceo, replace
replace ceo = 0 if ceo == .

* Save cleaned dataset
save "${nomenclature}pcs03.dta", replace

* Create a list of unique PCS codes
keep pcs
gen i = _n
rename pcs PCS
save "${nomenclature}pcs03_list.dta", replace

* Import data again for correspondence between PCS 1982 and 2003
clear
import excel using "${nomenclature}PCS_corresp_82_2003.xls", firstrow allstring clear

keep code1982 code2003 libellé2003
drop if code1982 == ""
drop if code2003 == ""

* Retain unique PCS1982 codes
bys code1982: keep if _n == 1 // 353 PCS codes in 1982

rename (code1982 code2003 libellé2003) (pcs82 pcs libel03)
drop libel03
tostring pcs82 pcs, force replace

saveold "${nomenclature}pcs82.dta", replace

**********************************************************************
* APE (Principal Activity Code)
**********************************************************************

clear 

insheet using "${nomenclature}int_courts_naf_rev_2.csv", delim(",") case names

rename IntitulsdelaNAFrv2versionfinale APElib
keep if length(Code) > 1
replace Code = subinstr(Code, ".", "", .) 
duplicates drop Code, force
keep Code APElib

save "${nomenclature}APElibel.dta", replace

* Import data again for the comprehensive list of APE codes
clear
insheet using "${nomenclature}naf2008_5_niveaux.csv", delim(",") case names

* Remove unwanted characters from NIV5 and drop duplicates
replace NIV5 = subinstr(NIV5, ".", "", .) 
duplicates drop NIV5, force

keep NIV5
rename NIV5 APE

save "${nomenclature}naf2008_5_niveaux.dta", replace

* Import again, ensuring all columns are treated as strings
clear
import delimited using "${nomenclature}naf2008_5_niveaux.csv", stringcols(_all)

* Remove unwanted characters from various level codes
replace niv5 = subinstr(niv5, ".", "", .)
replace niv4 = subinstr(niv4, ".", "", .)
replace niv3 = subinstr(niv3, ".", "", .)
replace niv2 = subinstr(niv2, ".", "", .)

duplicates drop niv5, force
label var niv5 "APE rev2 niv5"
label var niv4 "APE rev2 niv4"
label var niv3 "APE rev2 niv3"
label var niv2 "APE rev2 niv2"
label var niv1 "APE rev2 niv1"

save "${output_stata}\nafniveauxrev2.dta", replace 

* Extract the level 3 NAF codes and save as a separate dataset
keep niv3 niv1
duplicates drop niv3, force
save "${output_stata}\nafniveauxrev2_naf3.dta", replace 
